"""
1.xpath解析
2.对页面分析
3.爬取相关内容

xpath  处理xml（其中包括html）
pip install  lxml 
etree.HTML() 
"""

import  requests
from lxml import  etree
from pandas import DataFrame 

list = []
list2 = [] 
info = {}

headers ={
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.46'
}
for i in range(1,11):
    page =  (i-1)*25
    url = f'https://movie.douban.com/top250?start={page}&filter='
    res = requests.get(url,headers=headers)
    res.encoding = 'utf-8'

    et = etree.HTML(res.text)
    content = et.xpath('//*[@id="content"]/div/div[1]/ol/li[*]/div/div[2]/div[1]/a/span[1]/text()')
    content2 = et.xpath('//*[@id="content"]/div/div[1]/ol/li[*]/div/div[2]/div[2]/p[2]/span/text()')
    
    # for i in content:
    #     # print(i)
    #     list.append(i)
    # for j in content2:
    #     list2.append(j)

len(list)
len(list2)
# print(list)
# info['name'] =list
# info['qoute'] = list2
# # print(info)
# df = DataFrame(info)
# df.to_excel('./Test/crawler/TOP250.xlsx')